import os
from pathlib import Path

from langchain_community.document_loaders import (
    DirectoryLoader,
    UnstructuredFileLoader,
)
from langchain_core.documents import Document


def load_documents(data_path: str) -> list[Document]:
    """Loads documents from the specified directory path."""
    path = Path(data_path)
    if not path.exists() or not path.is_dir():
        raise ValueError(f"The path '{data_path}' is not a valid directory.")

    print(f"Loading documents from {data_path}...")

    # Using DirectoryLoader to handle various file types
    # You can customize the glob pattern to include more file types
    # and add more specific loaders if needed.
    loader = DirectoryLoader(
        str(path),
        glob="*.xlsx|*.xls|*.csv|*.docx|*.pdf",
        show_progress=True,
        use_multithreading=True,
        loader_cls=lambda p: UnstructuredFileLoader(p),
    )

    documents = loader.load()
    print(f"Loaded {len(documents)} documents.")
    return documents
